
***********
Carly Knight
MSA.PMSA Level
4/2/10

Description: This program combines datasets from exported ArcGIS DBF Files with Various Census Tract Files. 

(1) UrbanAreas: File with indicator if census tract fell within a designated "urban area." Calculated Using GIS Arcmap
(2) Analysisfile_ctract_msapmsa: File exported from ArcGIS with assortment of main census variables. This should contain population variables and calcualtion of 
	census tract area.
(3)	Residentialstability: Data from Geolytics with Residential Stability and Fips
(4) Socialexplorer_fampov: Data from Social Explorer with Poverty Rate Status
(5) Historical Data from Geolytics:
	C70: 1970 Relevant Variables
	C80: 1980 Relevant Variables
	C90P: 1990 Relevant Variables

RESULT:
Creates 2 levels of data:
(1) analysisfile_ctract_historical.csv: Census-tract level dataset including historical data
(2) analysisfile_msapmsa_historical.csv: MSA level dataset including an indicator for all those PMSAS/MSAS which subsume census-tract level.
***********;

*create file for urban areas to be merged onto center city;

libname urban "L:\Small Replication\NewModels\data\TablesforGIS";
libname main "L:\Small Replication\NewModels\data\Datafiles\census_msapmsa";

libname restab "L:\Small Replication\filesformariopaperreplication\data\Censusdata";
libname tract2 "L:\Data\Census_data\National Data by Census Tract";
libname export "L:\Small Replication\filesformariopaperreplication\ArcGIS\data\ForHLMAnalysis";

libname c70 "L:\Data\Neighborhood_Change_Database\1970";
libname c80 "L:\Data\Neighborhood_Change_Database\1980";
libname c90 "L:\Data\Neighborhood_Change_Database\1990";

**************************************************************************************************************************************************************;
*export urban areas file for arcgis;
data urbanareas_trunc (keep = fips urbanarea);
set urban.urbanareas;
run;
proc export data= urbanareas_trunc outfile="L:\Small Replication\NewModels\data\GISShapefile\CensusTractLevel\urbanareas.dbf" dbms=dbf replace;
run;
***************************************************************************************************************************************************************;

*PUT TOGETHER MAIN FILE WITH CENSUS2000 VARIABLES;
data main;
set main.analysisfile_ctract_msapmsa;

fipsnum = input(fips, 15.0);
drop fips;
rename fipsnum = fips;

**INDEPENDENT VARIABLES***;
*create race percents;

if pop2000 NE 0 then do; 
		percent_white = white/pop2000;
		percent_black = black/pop2000;
		percent_hispanic = hispanic/pop2000;
		percent_asian = asian/pop2000;
		end;
else if pop2000 = 0 then delete;

*population density;
		if pop00_sqmi = 0 then delete;
lpopden = log(POP00_SQMI);

centercity = max_count ;
urbanarea = max_urbana;

area = SQMI;
if sqmi =0 then delete;

****DEPENDENT VARIABLES****;
*business total;
bsntotal = sum_count;

run;

data res_instab;
set restab.residentialstability ;
res_instab = smhse0;
label res_instab = "Persons 5+ years old who moved from the central city of the same metro area within the past 5 years";
run;

data tract2;
set tract2.socialexplorer_fampov;
percent_pov = input(percentfam_pov, 10.7);
if percent_pov > 0.40 then conc_pov40 = 1; else conc_pov40 = 0;
if percent_pov > 0.30 then conc_pov30 = 1; else conc_pov30 = 0;
run;
*foreign born;
data foreign;
set restab.foreignborn1;
prop_for = input(pforeign, 10.5);
run;

PROC SORT DATA = Main; BY fips; RUN;
PROC SORT DATA = TRACT2; BY Geo_FIPS; RUN;
PROC SORT DATA = res_instab; BY geo2000; RUN;
PROC SORT DATA =  foreign; by geo_fips; run;
 

*merge census level data, keep only obs in tract dataset;
data tract_final;
merge main(IN = x1) tract2(rename= (geo_fips = fips)) foreign(rename= (geo_fips = fips)) res_instab(rename=(geo2000=fips));
by fips;
if x1;
run;
data analysisfile_ctract_pmsa_01 (keep  = FIPS fipschar geo2000 areakey area pop0 pwht0 pblck0 phisp0 pfor0 pasian 
ppov0 pdfh0 psmh0 ipovblk0 ipovfor0 ccpov ccurb bsntotal lpopden	centercity urbanarea  conc_pov40 conc_pov30); 
set tract_final;

pop0 = pop2000;
pwht0 = percent_white;
pblck0 = percent_black;
phisp0 = percent_hispanic;
pfor0 = prop_for;
pasian = percent_asian;
ppov0 = percent_pov; 
pdfh0 = res_instab;
psmh0 = 1- res_instab;
ipovblk0 =  percent_black * percent_pov;
ipovfor0 = ppov0 *pfor0;
ccpov =  centercity * ppov0 ;
ccurb = urbanarea *ppov0 ;
areakey = ID  ;
fipschar = 	put(fips, 11.0);
geo2000 = fips;
label
pdfh0 = "proportion in a different house five years ago"
psmh0 = "proportion in the same house five years ago"
ipovblk0 = "Interaction between proportion in poverty and proportion black"
ipovfor0 = "interaction between proportion in poverty and porpotion foreign"
;
run;


data c0;
set analysisfile_ctract_pmsa_01 ;
run;

*********************************
*historic vars
******************************:

*merge 1980 files;
data c81;
set c80.c81;
geo2000_1 = input(geo2000, 12.0);
drop geo2000;
rename geo2000_1 = geo2000;
run;
data c82;
set c80.c82;
run;
proc sort data = c81; by geo2000; run;
proc sort data = c82; by geo2000; run;
data c80.c8;
merge c81 c82;
by geo2000;
run;

*merge 1990 files;
proc sort data = c90.c91; by geo2000; run;
proc sort data = c90.c92; by geo2000; run;
proc sort data = c90.c93; by geo2000; run;
data c90.c9;
merge c90.c91 c90.c92 c90.c93;
by geo2000;
run;

*create vars across 1970, 1980, 1990 variables;
option spool;
%macro census (n = year);
data c&n. (keep = geo2000 pop&n  pblck&n pwht&n phisp&n pfor&n 	psmh&n pdfh&n  punemp&n  ppov&n ppovwht&n
ppovblk&n 	ppovhisp&n prop_welfare&n conc_pov40&n  conc_pov30&n ipovblk&n ipovfor&n);
set c&n.0.c&n;

pop&n = trctpop&n;
pblck&n = shrblk&n;
pwht&n = shrwht&n;
phisp&n = shrhsp&n;
pfor&n = shrfor&n;
psmh&n  = smhse&n;
pdfh&n = 1- smhse&n;
punemp&n = unemprt&n;
ppov&n = povrat&n;
ppovwht&n = whtpr&n;
ppovblk&n = blkpr&n;
ppovhisp&n = hispr&n;
prop_welfare&n = welfare&n;

if ppov&n > 0.40 then conc_pov40&n = 1; else conc_pov40&n = 0;
if ppov&n> 0.30 then conc_pov30&n = 1; else conc_pov30&n = 0;

*interactions;
ipovblk&n = ppov&n *	pblck&n ;
ipovfor&n = ppov&n *pfor&n  ;

geo2000_1 = input(geo2000, 12.0);
drop geo2000;
rename geo2000_1 = geo2000;

label 
ppovhisp&n = 'Prportion of hispanic households under poverty line'
ppovwht&n = 'Porportion of white population under poverty line'
ppovblk&n = 'Proportion of black population under poverty line';

run;

proc sort data = c&n.;
by geo2000;
run;
%mend;
%census(n = 7)
%census(n = 8) *missing unemployment;
%census(n = 9) *missing samehouse

*merge all files together for a master file;
PROC SORT DATA = c0; BY geo2000; RUN;
*merge census level data, keep only obs in tract dataset;
data tract_pmsa_historical;
merge c0(IN = x1) c9 c8 c7;
by geo2000;
if x1;

chpblack07 =  pblck0 - pblck7;
chpfor07 =  pfor0 - pfor7;
chppov07 =  ppov0 - ppov7;
chdhous07 = pdfh0 - pdfh7;
chcpov07 =  conc_pov40 - conc_pov407;

ichppov07 = chppov07  *	centercity;

drop psmh9 pdfh9 punemp8;

array vars(*) centercity	urbanarea	fips	lpopden	area	bsntotal	conc_pov40	conc_pov30	pop0	pwht0	pblck0	phisp0	pfor0	pasian	ppov0	pdfh0	psmh0	ipovblk0	
geo2000	pop9	pblck9	pwht9	phisp9	pfor9	punemp9	ppov9	ppovwht9	ppovblk9	ppovhisp9	prop_welfare9	conc_pov409	conc_pov309	
ipovfor0
ipovblk9	ipovfor9	pop8	pblck8	pwht8	phisp8	pfor8	psmh8	pdfh8		ppov8	ppovwht8	ppovblk8	ppovhisp8	prop_welfare8	conc_pov408	conc_pov308	ipovblk8	
ipovfor8	pop7	pblck7	pwht7	phisp7	pfor7	psmh7	pdfh7	punemp7	ppov7	ppovwht7	ppovblk7	ppovhisp7	prop_welfare7	conc_pov407	conc_pov307	ipovblk7	ipovfor7	
chpblack07	chpfor07	chppov07	chdhous07	chcpov07 ;

do i = 1 to dim(vars);
if vars(i) = . then delete;
end; drop i;

run;

*create a metro-level dataset for HLM;
PROC SORT DATA = tract_pmsa_historical; BY areakey; RUN;
data analysisfile_msapmsa_historical  (keep = areakey tractcnt);
set tract_pmsa_historical;
by 	areakey;
if first.areakey then do;
	tractcnt = 0;
	end;

tractcnt + 1;

if last.areakey then output;
run;

proc sort data = tract_pmsa_historical;
by areakey;
run;

*export;
proc export data= tract_pmsa_historical outfile="L:\Small Replication\NewModels\data\Datafiles\census_msapmsa\analysisfile_ctract_historical.csv" dbms=csv replace;
run;
proc export data=  analysisfile_msapmsa_historical  outfile="L:\Small Replication\NewModels\data\Datafiles\census_msapmsa\analysisfile_msapmsa_historical.csv" dbms=csv replace;
run;





